*************************************************************************************
************** Project: Robots and Household Financial Behavior *********************
***************************  Gomes, Jansson and Karabulut ***************************
******************************** SAMPLE CONSTRUCTION ********************************
*************************************************************************************
****************************** LAST MODIFIED ****************************************
******************************** 2023-06-07 *****************************************
** Stata Settings ***
cap clear all
macro drop _all
clear
set mem 1000m
set matsize 11000
set maxvar 10000
set more off
**Define Paths**
global data "DEFINE THE PATH HERE"
global projf "DEFINE THE PATH HERE"
*** Uploading the Dataset ***
**Saving a log file**
cap log close
log using "$projf/logs/RFS_revision/2_Sample_Construction_230425.log", replace
**Construct the working Sample for Panel Analysis**
use "$projf/data/linda_sample230423_1999.dta", clear
forvalues i=2000(1)2007 {
**Upload the Working Sample**
append using "$projf/data/linda_sample230423_`i'.dta"
}
**Use only the regular LINDA sample (not the immigrant sample)**
count
tab sampled_f sampled_i
**Merge the LINDA data with the Robot Data (including all European Countries + US + South Korea)**
sort SNI2_s year
merge m:1 SNI2_s year using "$data/robot_data_all_updated190531.dta" 
count
**Tabulate the data**
tab _merge
*Drop obs in case there is no match of robot data with the LINDA*
drop if _merge==2 
drop _merge
/* Note: we should keep those households whose industry of occupation do not match with the robot data */
count
**Merge the LINDA data with the Industry Controls Data**
sort SNI2_s year
merge m:1 SNI2_s year using "\\micro.intra\projekt\P0459$\P0459_gem\STATA\Third_project\20180618\dofiles\industrycontrols\data\5_ind_controls_190517.dta" 
count
**Tabulate the data**
tab _merge
*Drop obs in case there is no match with the LINDA*
drop if _merge==2 
/* Note: we keep those households whose industry of occupation do not match with the robot data */
count
**Define a Panel and Time Variable**
sort idhh year
tsset idhh year
**Generate a Dummy variable for those households who are working in an industry with available robot information as of 1999
gen robot_ind=1 if mi(rdensity_swe)==0 & year==1999
replace robot_ind=0 if mi(rdensity_swe) & year==1999
**Number of "Treated" Households as of 1999
tab robot_ind
*Focus only on those households in the working age (Hurst&Lusardi, JPE)**
drop if (age>60|age<22) 
**Number of "Treated" Households as of 1999 after the filters
tab robot_ind
**Summary statistics Occupation Data**
replace ssyk = 000 if ssyk==.
tab ssyk if year==1999
*sum inc* parish* muni* age edulev* employed unemployed selfemployed student immigrant if ssyk!=0 & year==1999
*sum inc* parish* muni* age edulev* employed unemployed selfemployed student immigrant if ssyk==0 & year==1999
tab sampled_f if ssyk==0 & year==1999
tab sampled_f if ssyk!=0 & year==1999
*********************************************************************************
*********************************************************************************
*********************************************************************************
************** DEFINE THE OUTCOME VARIABLES OF INTEREST
*********************************************************************************
*************************************************************
***** ANNUAL STATUS / DIFFERENCES IN OUTCOME VARIABLES 
*************************************************************
*Household identifier variable in 1999 and 2007*
bys idhh: gen idhh_1999=idhh if year==1999
bys idhh: egen idhh_99=mean(idhh_1999)
drop idhh_1999
bys idhh: gen idhh_2007=idhh if year==2007
bys idhh: egen idhh_07=mean(idhh_2007)
drop idhh_2007
*Define an Entry Variable in the Stock Market*
gen entry_stock=1 if (drisky==1 & L.drisky==0)
replace entry_stock=0 if (drisky==0 & L.drisky==0)
replace entry_stock=. if (drisky==1 & L.drisky==1) 
replace entry_stock=. if (drisky==0 & L.drisky==1) 
la var entry_stock "Entry in the Stock Market"
*Define an Exit Variable from the Stock Market*
gen exit_stock=1 if (drisky==0 & L.drisky==1)
replace exit_stock=0 if (drisky==1 & L.drisky==1)
replace exit_stock=. if (drisky==0 & L.drisky==0) 
replace exit_stock=. if (drisky==1 & L.drisky==0)
la var exit_stock "Exit from the Stock Market"
*Define a Variable for Change in the Risky Share in the Portfolio**
gen drisky_share=(risky_share-L.risky_share)
la var drisky_share "Annual change in risky share"
*Define a Variable for Annual Percentage Change in Earnings**
*Take the natural log of the hh earnings*
cap gen ln_earnings=ln(inchh_earned+1)
*Annual change in earnings*
gen dln_earnings=(ln_earnings-L.ln_earnings)
la var dln_earnings "Annual change in earnings"
*Define a Variable for Financial Wealth-to-Income Ratio**
cap gen wi_ratio=(hhfinw/inchh_earned)
*gen wi_ratio=(hhfinw/inchh)
*Annual change in wealth-to-income ratio*
gen d_wi_ratio=(wi_ratio-L.wi_ratio)
la var d_wi_ratio "Annual change in W-t-Inc"
*Net Wealth in Levels in 1999
bys idhh: gen nw_1999=hhnetw if year==1999
bys idhh: egen nw_99=mean(nw_1999)
drop nw_1999
*Net Wealth in Levels in 2007
bys idhh: gen nw_2007=hhnetw if year==2007
bys idhh: egen nw_07=mean(nw_2007)
drop nw_2007
*IHS of Net Wealth in 1999
bys idhh: gen lnw_1999=lnw if year==1999
bys idhh: egen lnw_99=mean(lnw_1999)
drop lnw_1999
*IHS of Net Wealth in 2007
bys idhh: gen lnw_2007=lnw if year==2007
bys idhh: egen lnw_07=mean(lnw_2007)
drop lnw_2007
*Change between 1999 and 2007
gen dl_nw_99_07=(lnw_07-lnw_99) if year==1999
gen d_nw_99_07=(nw_07-nw_99) if year==1999
sum dl_nw_99_07 lnw_07 lnw_99 d_nw_99_07 nw_07 nw_99 if year==1999
sum dl_nw_99_07 lnw_07 lnw_99 d_nw_99_07 nw_07 nw_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dl_nw_99_07 d_nw_99_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dl_nw_99_07 d_nw_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Total Savings of Households (a la Bach et al., 2018)
bys idhh: gen tot_sav=(hhnetw-L.hhnetw)
bys idhh: gen tot_sav_rate=(tot_sav/L.hhnetw)
bys idhh: gen tot_sav_rate_inc=(tot_sav/inchh)
*Exclude observations with negative & missing wealth or income information*
replace tot_sav_rate=. if L.hhnetw==0
replace tot_sav_rate=. if L.hhnetw<0
replace tot_sav_rate_inc=. if inchh==0
replace tot_sav_rate_inc=. if inchh<0
*Winsorize the total saving rate
winsor tot_sav_rate if mi(tot_sav_rate)==0, gen(wtot_sav_rate) p(.01) 
replace tot_sav_rate=wtot_sav_rate
drop wtot_sav_rate
winsor tot_sav_rate_inc if mi(tot_sav_rate)==0, gen(wtot_sav_rate_inc) p(.01) 
replace tot_sav_rate_inc=wtot_sav_rate_inc
drop wtot_sav_rate_inc
************************************************************
***** LONG DIFFERENCES IN OUTCOME VARIABLES (1999 vs. 2007)
*************************************************************
*Define Entry-Exit Variable for the Years between 1999 - 2007*
*Participation status in 1999*
bys idhh: gen drisky_1999=drisky if year==1999
bys idhh: egen drisky_99=mean(drisky_1999)
drop drisky_1999
*Participation status in 2000*
bys idhh: gen drisky_2000=drisky if year==2000
bys idhh: egen drisky_00=mean(drisky_2000)
drop drisky_2000
*Participation status in 2006*
bys idhh: gen drisky_2006=drisky if year==2006
bys idhh: egen drisky_06=mean(drisky_2006)
drop drisky_2006
*Participation status in 2007*
bys idhh: gen drisky_2007=drisky if year==2007
bys idhh: egen drisky_07=mean(drisky_2007)
drop drisky_2007
*An Entry Variable in the Stock Market for 1999-2007 period
gen entry_stock_99_07=1 if (drisky_07==1 & drisky_99==0) & year==1999
replace entry_stock_99_07=0 if (drisky_07==0 & drisky_99==0) & year==1999
replace entry_stock_99_07=. if (drisky_07==1 & drisky_99==1) & year==1999 
replace entry_stock_99_07=. if (drisky_07==0 & drisky_99==1) & year==1999 
*An Exit Variable from the Stock Market for 1999-2007 period
gen exit_stock_99_07=1 if (drisky_07==0 & drisky_99==1) & year==1999
replace exit_stock_99_07=0 if (drisky_07==1 & drisky_99==1) & year==1999
replace exit_stock_99_07=. if (drisky_07==0 & drisky_99==0) & year==1999 
replace exit_stock_99_07=. if (drisky_07==1 & drisky_99==0) & year==1999 
*An Exit Variable from the Stock Market for 1999-2000 period
gen exit_stock_99_00=1 if (drisky_00==0 & drisky_99==1) & year==1999
replace exit_stock_99_00=0 if (drisky_00==1 & drisky_99==1) & year==1999
replace exit_stock_99_00=. if (drisky_00==0 & drisky_99==0) & year==1999 
replace exit_stock_99_00=. if (drisky_00==1 & drisky_99==0) & year==1999 
 *An Exit Variable from the Stock Market for 2006-2007 period
gen exit_stock_06_07=1 if (drisky_07==0 & drisky_06==1) & year==1999
replace exit_stock_06_07=0 if (drisky_07==1 & drisky_06==1) & year==1999
replace exit_stock_06_07=. if (drisky_07==0 & drisky_06==0) & year==1999 
replace exit_stock_06_07=. if (drisky_07==1 & drisky_06==0) & year==1999 
*Define The Change in Risky Assets Share for the Years between 1999 - 2007*
*Including real estate 
*1999
bys idhh: gen risky_share_re_1999=risky_share_re if year==1999
bys idhh: egen risky_share_re_99=mean(risky_share_re_1999)
drop risky_share_re_1999
*2000
bys idhh: gen risky_share_re_2000=risky_share_re if year==2000
bys idhh: egen risky_share_re_00=mean(risky_share_re_2000)
drop risky_share_re_2000
*2006
bys idhh: gen risky_share_re_2006=risky_share_re if year==2006
bys idhh: egen risky_share_re_06=mean(risky_share_re_2006)
drop risky_share_re_2006
*2007
bys idhh: gen risky_share_re_2007=risky_share_re if year==2007
bys idhh: egen risky_share_re_07=mean(risky_share_re_2007)
drop risky_share_re_2007
*Traditional measure
*1999
bys idhh: gen risky_share_1999=risky_share if year==1999
bys idhh: egen risky_share_99=mean(risky_share_1999)
drop risky_share_1999
*2000
bys idhh: gen risky_share_2000=risky_share if year==2000
bys idhh: egen risky_share_00=mean(risky_share_2000)
drop risky_share_2000
*2006
bys idhh: gen risky_share_2006=risky_share if year==2006
bys idhh: egen risky_share_06=mean(risky_share_2006)
drop risky_share_2006
*2007
bys idhh: gen risky_share_2007=risky_share if year==2007
bys idhh: egen risky_share_07=mean(risky_share_2007)
drop risky_share_2007
*Change between 1999 and 2007
gen drisky_share_99_07=(risky_share_07-risky_share_99) if year==1999
gen drisky_decre_99_07 = 1 if (risky_share_07<risky_share_99) & risky_share_99!=. & risky_share_07!=. & year==1999
replace drisky_decre_99_07 = 0 if (risky_share_07>=risky_share_99) & risky_share_99!=. & risky_share_07!=. & year==1999
***********************************************************************************************************************************************
*Percentage Change in Risky Assets (Version I)** /* Note that those with zero risky share in 1999 are going to be treated as missing */
gen per_chg_risky_99_07=((risky_share_07-risky_share_99)/risky_share_99) if year==1999
replace per_chg_risky_99_07= 0 if mi(per_chg_risky_99_07)==1 & risky_share_07==0 & risky_share_99==0 & year==1999
*Percentage Change in Risky Assets (Version II)** /* This definition follows Calvet et al (2020) */
gen alt_per_chg_risky_99_07=(2*(risky_share_07-risky_share_99)/(risky_share_99+risky_share_07)) if year==1999
replace alt_per_chg_risky_99_07=0 if mi(alt_per_chg_risky_99_07)==1 & risky_share_07==0 & risky_share_99==0 & year==1999
************************************************************************************************************************************************
*Drop Variables of No Interest*
sum drisky_share_99_07 risky_share_07 risky_share_99 per_chg_risky_99_07 alt_per_chg_risky_99_07 if year==1999
sum drisky_share_99_07 risky_share_07 risky_share_99 per_chg_risky_99_07 alt_per_chg_risky_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Define The Change in Income Rank for the Years between 1999 - 2007*
*1999
bys idhh: gen pct_inc_rank_1999=pct_inc_rank if year==1999
bys idhh: egen pct_inc_rank_99=mean(pct_inc_rank_1999)
drop pct_inc_rank_1999
*2007
bys idhh: gen pct_inc_rank_2007=pct_inc_rank if year==2007
bys idhh: egen pct_inc_rank_07=mean(pct_inc_rank_2007)
drop pct_inc_rank_2007
*Dummy variable for Lower Position in the Income Distribution between 1999 and 2007
gen inc_lower_99_07=1 if (pct_inc_rank_07<pct_inc_rank_99) & pct_inc_rank_07!=. & pct_inc_rank_99!=. & year==1999
replace inc_lower_99_07=0 if (pct_inc_rank_07>pct_inc_rank_99) & pct_inc_rank_07!=. & pct_inc_rank_99!=. & year==1999
replace inc_lower_99_07=0 if (pct_inc_rank_07==pct_inc_rank_99) & pct_inc_rank_07!=. & pct_inc_rank_99!=. & year==1999
sum inc_lower_99_07 pct_inc_rank_07 pct_inc_rank_99 if year == 1999
sum inc_lower_99_07 pct_inc_rank_07 pct_inc_rank_99 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Define The Change in Earnings Rank for the Years between 1999 - 2007*
*1999
bys idhh: gen pct_earn_rank_1999=pct_earn_rank if year==1999
bys idhh: egen pct_earn_rank_99=mean(pct_earn_rank_1999)
drop pct_earn_rank_1999
*2007
bys idhh: gen pct_earn_rank_2007=pct_earn_rank if year==2007
bys idhh: egen pct_earn_rank_07=mean(pct_earn_rank_2007)
drop pct_earn_rank_2007
*Dummy variable for Lower Position in the Earnings Distribution between 1999 and 2007
gen earn_lower_99_07=1 if (pct_earn_rank_07<pct_earn_rank_99) & pct_earn_rank_07 !=. & pct_earn_rank_99 !=. & year==1999
replace earn_lower_99_07=0 if (pct_earn_rank_07>pct_earn_rank_99) & pct_earn_rank_07 !=. & pct_earn_rank_99 !=. & year==1999
replace earn_lower_99_07=0 if (pct_earn_rank_07==pct_earn_rank_99) & pct_earn_rank_07 !=. & pct_earn_rank_99 !=. & year==1999
sum earn_lower_99_07 pct_earn_rank_07 pct_earn_rank_99 if year==1999
sum earn_lower_99_07 pct_earn_rank_07 pct_earn_rank_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Define The Percentange Change in Earnings for the Years between 1999 - 2007*
*Take the natural log of the hh earnings*
cap gen ln_earnings=ln(inchh_earned+1)
*1999
bys idhh: gen ln_earnings_1999=ln_earnings if year==1999
bys idhh: egen ln_earnings_99=mean(ln_earnings_1999)
drop ln_earnings_1999
*2007
bys idhh: gen ln_earnings_2007=ln_earnings if year==2007
bys idhh: egen ln_earnings_07=mean(ln_earnings_2007)
drop ln_earnings_2007
*Change between 1999 and 2007
gen dln_earnings_99_07=(ln_earnings_07-ln_earnings_99) if year==1999
sum dln_earnings_99_07 ln_earnings_07 ln_earnings_99 if year==1999
sum dln_earnings_99_07 ln_earnings_07 ln_earnings_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Define The Percentange Change in Disposable income (excluding capital income) for the years between 1999 - 2007*
*Take the natural log of the hh disposable income*
sum inchh_dis_nocap, d
replace inchh_dis_nocap = 0 if inchh_dis_nocap<0
cap gen ln_disp_nocap=ln(inchh_dis_nocap+1)
*1999
bys idhh: gen ln_disp_nocap_1999=ln_disp_nocap if year==1999
bys idhh: egen ln_disp_nocap_99=mean(ln_disp_nocap_1999)
drop ln_disp_nocap_1999
*2007
bys idhh: gen ln_disp_nocap_2007=ln_disp_nocap if year==2007
bys idhh: egen ln_disp_nocap_07=mean(ln_disp_nocap_2007)
drop ln_disp_nocap_2007
*Change between 1999 and 2007
gen dln_disp_nocap_99_07=(ln_disp_nocap_07-ln_disp_nocap_99) if year==1999
sum dln_disp_nocap_99_07 ln_disp_nocap_07 ln_disp_nocap_99 if year==1999
sum dln_disp_nocap_99_07 ln_disp_nocap_07 ln_disp_nocap_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Drop Variables of No Interest*
drop ln_disp_nocap_99 ln_disp_nocap_07
*Define The Percentange Change in Fin Wealth for the Years between 1999 - 2007*
*Take the natural log of the hh fin wealth*
cap gen ln_finw=ln(hhfinw+1)
*1999
bys idhh: gen ln_finw_1999=ln_finw if year==1999
bys idhh: egen ln_finw_99=mean(ln_finw_1999)
drop ln_finw_1999
*2007
bys idhh: gen ln_finw_2007=ln_finw if year==2007
bys idhh: egen ln_finw_07=mean(ln_finw_2007)
drop ln_finw_2007
*Change between 1999 and 2007
gen dln_finw_99_07=(ln_finw_07-ln_finw_99) if year==1999
sum dln_finw_99_07 ln_finw_07 ln_finw_99 if year==1999
sum dln_finw_99_07 ln_finw_07 ln_finw_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dln_finw_99_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dln_finw_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Drop Variables of No Interest*
drop ln_finw_99 ln_finw_07
*Define The Percentange Change in Debt for the Years between 1999 - 2007*
*Take the natural log of the hh debt variable*
sum hhdebt, d
replace hhdebt = 0 if hhdebt<0 & mi(hhdebt)==0
cap gen ln_debt=ln(hhdebt+1)
*1999
bys idhh: gen ln_debt_1999=ln_debt if year==1999
bys idhh: egen ln_debt_99=mean(ln_debt_1999)
drop ln_debt_1999
*2007
bys idhh: gen ln_debt_2007=ln_debt if year==2007
bys idhh: egen ln_debt_07=mean(ln_debt_2007)
drop ln_debt_2007
*Change between 1999 and 2007
gen dln_debt_99_07=(ln_debt_07-ln_debt_99) if year==1999
sum dln_debt_99_07 ln_debt_07 ln_debt_99 if year==1999
sum dln_debt_99_07 ln_debt_07 ln_debt_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dln_debt_99_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dln_debt_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Drop Variables of No Interest*
drop ln_debt_99 ln_debt_07
*Define The Percentange Change in Housing assets (single family homes) for the Years between 1999 - 2007*
*Take the natural log of the hh housing variable*
sum hhhouse, d
replace hhhouse = 0 if hhhouse<0 & mi(hhhouse)==0
cap gen ln_house=ln(hhhouse+1)
*1999
bys idhh: gen ln_house_1999=ln_house if year==1999
bys idhh: egen ln_house_99=mean(ln_house_1999)
drop ln_house_1999
*2007
bys idhh: gen ln_house_2007=ln_house if year==2007
bys idhh: egen ln_house_07=mean(ln_house_2007)
drop ln_house_2007
*Change between 1999 and 2007
gen dln_house_99_07=(ln_house_07-ln_house_99) if year==1999
sum dln_house_99_07 ln_house_07 ln_house_99 if year==1999
sum dln_house_99_07 ln_house_07 ln_house_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dln_house_99_07 ln_house_99 ln_house_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dln_house_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Drop Variables of No Interest*
*drop ln_house_99 ln_house_07
********
*Define The Percentange Change in real assets for the years between 1999 - 2007*
*Take the natural log of the hh real estate variable*
sum hhrealassets, d
replace hhrealassets = 0 if hhrealassets<0 & mi(hhrealassets)==0
cap gen ln_ra=ln(hhrealassets+1)
*1999
bys idhh: gen ln_ra_1999=ln_ra if year==1999
bys idhh: egen ln_ra_99=mean(ln_ra_1999)
bys idhh: gen ra_1999=hhrealassets if year==1999
bys idhh: egen ra_99=mean(ra_1999)
drop ln_ra_1999 ra_1999
*2007
bys idhh: gen ln_ra_2007=ln_ra if year==2007
bys idhh: egen ln_ra_07=mean(ln_ra_2007)
bys idhh: gen ra_2007=hhrealassets if year==2007
bys idhh: egen ra_07=mean(ra_2007)
drop ln_ra_2007 ra_2007
*Change between 1999 and 2007
gen dln_ra_99_07=(ln_ra_07-ln_ra_99) if year==1999
gen d_ra_99_07 = (ra_07-ra_99) if year==1999
sum dln_ra_99_07 ln_ra_07 ln_ra_99 d_ra_99_07 ra_07 ra_99 if year==1999
sum dln_ra_99_07 ln_ra_07 ln_ra_99 d_ra_99_07 ra_07 ra_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dln_ra_99_07 d_ra_99_07 ln_ra_99 ln_ra_07 ra_99 ra_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dln_ra_99_07 d_ra_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Define The Change in real assets (IHS) for the years between 1999 - 2007*
*1999
bys idhh: gen lre_1999=lre if year==1999
bys idhh: egen lre_99=mean(lre_1999)
drop lre_1999
*2007
bys idhh: gen lre_2007=lre if year==2007
bys idhh: egen lre_07=mean(lre_2007)
drop lre_2007
*Change between 1999 and 2007
gen dlre_99_07=(lre_07-lre_99) if year==1999
sum dlre_99_07 lre_07 lre_99 if year==1999
sum dlre_99_07 lre_07 lre_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dlre_99_07 lre_99 lre_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dlre_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Define The Percentange Change in total assets for the years between 1999 - 2007*
*Take the natural log of the hh total asset variable*
sum hhtotalassets, d
replace hhtotalassets = 0 if hhtotalassets<0 & mi(hhtotalassets)==0
cap gen ln_ta=ln(hhtotalassets+1)
*1999
bys idhh: gen ln_ta_1999=ln_ta if year==1999
bys idhh: egen ln_ta_99=mean(ln_ta_1999)
bys idhh: gen ta_1999=hhtotalassets if year==1999
bys idhh: egen ta_99=mean(ta_1999)
drop ln_ta_1999 ta_1999
*2007
bys idhh: gen ln_ta_2007=ln_ta if year==2007
bys idhh: egen ln_ta_07=mean(ln_ta_2007)
bys idhh: gen ta_2007=hhtotalassets if year==2007
bys idhh: egen ta_07=mean(ta_2007)
drop ln_ta_2007 ta_2007
*Change between 1999 and 2007
gen dln_ta_99_07=(ln_ta_07-ln_ta_99) if year==1999
gen d_ta_99_07=(ta_07-ta_99) if year==1999
sum dln_ta_99_07 ln_ta_07 ln_ta_99 d_ta_99_07 ta_07 ta_99 if year==1999
sum dln_ta_99_07 ln_ta_07 ln_ta_99 d_ta_99_07 ta_07 ta_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist dln_ta_99_07 d_ta_99_07 ln_ta_99 ta_99 ln_ta_07 ta_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
sum dln_ta_99_07 d_ta_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Define The Change in Net Wealth Rank for the Years between 1999 - 2007*
*1999
bys idhh: gen pct_nw_rank_1999=pct_nw_rank if year==1999
bys idhh: egen pct_nw_rank_99=mean(pct_nw_rank_1999)
drop pct_nw_rank_1999
*2007
bys idhh: gen pct_nw_rank_2007=pct_nw_rank if year==2007
bys idhh: egen pct_nw_rank_07=mean(pct_nw_rank_2007)
drop pct_nw_rank_2007
*Dummy variable for Lower Position in the Wealth Distribution between 1999 and 2007
gen nw_lower_99_07=1 if (pct_nw_rank_07<pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
replace nw_lower_99_07=0 if (pct_nw_rank_07>pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
replace nw_lower_99_07=0 if (pct_nw_rank_07==pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
sum nw_lower_99_07 pct_nw_rank_07 pct_nw_rank_99 if year==1999
sum nw_lower_99_07 pct_nw_rank_07 pct_nw_rank_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Dummy variable for Higher Position in the Wealth Distribution between 1999 and 2007
gen nw_higher_99_07=1 if (pct_nw_rank_07>pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
replace nw_higher_99_07=0 if (pct_nw_rank_07<pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
replace nw_higher_99_07=0 if (pct_nw_rank_07==pct_nw_rank_99) & pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
sum nw_higher_99_07 pct_nw_rank_07 pct_nw_rank_99 if year==1999
sum nw_higher_99_07 pct_nw_rank_07 pct_nw_rank_99 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
*Changed position in the Wealth distribution between 1999 and 2007
gen dnw_pct_99_07=(pct_nw_rank_07-pct_nw_rank_99) if pct_nw_rank_07 !=. & pct_nw_rank_99 !=. & year==1999
sum dnw_pct_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
*Define an Unemployment Indicator for 2006 and 2007*
bys idhh: gen unempl_2006=unemployed if year==2006
bys idhh: egen unemp_06=mean(unempl_2006)
bys idhh: gen unempl_2007=unemployed if year==2007
bys idhh: egen unemp_07=mean(unempl_2007)
*Drop Variables of No Interest*
drop unempl_2006 unempl_2007
*\Note: By definition (as we drop unemployed people from the sample, this variable measures the transition to unemployment)
*Define The Change in Wealth-To-Income Ratio for the Years between 1999 - 2007*
*1999
bys idhh: gen wi_ratio_1999=wi_ratio if year==1999
bys idhh: egen wi_ratio_99=mean(wi_ratio_1999)
drop wi_ratio_1999
bys idhh: gen hhfinw_1999=hhfinw if year==1999
bys idhh: egen hhfinw_99=mean(hhfinw_1999)
drop hhfinw_1999
*2000
bys idhh: gen wi_ratio_2000=wi_ratio if year==2000
bys idhh: egen wi_ratio_00=mean(wi_ratio_2000)
drop wi_ratio_2000
*2006
bys idhh: gen wi_ratio_2006=wi_ratio if year==2006
bys idhh: egen wi_ratio_06=mean(wi_ratio_2006)
drop wi_ratio_2006
*2007
bys idhh: gen wi_ratio_2007=wi_ratio if year==2007
bys idhh: egen wi_ratio_07=mean(wi_ratio_2007)
drop wi_ratio_2007
bys idhh: gen hhfinw_2007=hhfinw if year==2007
bys idhh: egen hhfinw_07=mean(hhfinw_2007)
drop hhfinw_2007
foreach v of varlist wi_ratio_99 wi_ratio_00 wi_ratio_06 wi_ratio_07 hhfinw_99 hhfinw_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
*Change between 1999 and 2007
gen d_wi_ratio_99_07=(wi_ratio_07-wi_ratio_99)
la var d_wi_ratio_99_07 "Chng in W-Inc Ratio"
sum d_wi_ratio_99_07 wi_ratio_07 wi_ratio_99 if year == 1999
sum d_wi_ratio_99_07 wi_ratio_07 wi_ratio_99 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
foreach v of varlist d_wi_ratio_99_07 {
                winsor `v', gen(w`v') p(.05)
                *replace `v'=w`v'
                *drop w`v'
                }
sum d_wi_ratio_99_07 wd_wi_ratio_99_07 if year==1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d

*Define a Variable for Net Wealth-to-Income Ratio**
cap gen nwi_ratio=(hhnetw/inchh_earned)
*1999
bys idhh: gen nwi_ratio_1999=nwi_ratio if year==1999
bys idhh: egen nwi_ratio_99=mean(nwi_ratio_1999)
drop nwi_ratio_1999
*2000
bys idhh: gen nwi_ratio_2000=nwi_ratio if year==2000
bys idhh: egen nwi_ratio_00=mean(nwi_ratio_2000)
drop nwi_ratio_2000
*2006
bys idhh: gen nwi_ratio_2006=nwi_ratio if year==2006
bys idhh: egen nwi_ratio_06=mean(nwi_ratio_2006)
drop nwi_ratio_2006
*2007
bys idhh: gen nwi_ratio_2007=nwi_ratio if year==2007
bys idhh: egen nwi_ratio_07=mean(nwi_ratio_2007)
drop nwi_ratio_2007
foreach v of varlist nwi_ratio_99 nwi_ratio_00 nwi_ratio_06 nwi_ratio_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
*Define a Variable for Total assets-to-Income Ratio**
cap gen tai_ratio=(hhtotalassets/inchh_earned)
*1999
bys idhh: gen tai_ratio_1999=tai_ratio if year==1999
bys idhh: egen tai_ratio_99=mean(tai_ratio_1999)
drop tai_ratio_1999
*2000
bys idhh: gen tai_ratio_2000=tai_ratio if year==2000
bys idhh: egen tai_ratio_00=mean(tai_ratio_2000)
drop tai_ratio_2000
*2006
bys idhh: gen tai_ratio_2006=tai_ratio if year==2006
bys idhh: egen tai_ratio_06=mean(tai_ratio_2006)
drop tai_ratio_2006
*2007
bys idhh: gen tai_ratio_2007=tai_ratio if year==2007
bys idhh: egen tai_ratio_07=mean(tai_ratio_2007)
drop tai_ratio_2007
foreach v of varlist tai_ratio_99 tai_ratio_00 tai_ratio_06 tai_ratio_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }
*Define a Variable for Financial Wealth(including Housing)-to-Income Ratio**
cap gen fwrei_ratio=((hhfinw+hhrealassets)/inchh_earned)	
*1999
bys idhh: gen fwrei_ratio_1999=fwrei_ratio if year==1999
bys idhh: egen fwrei_ratio_99=mean(fwrei_ratio_1999)
drop fwrei_ratio_1999
*2000
bys idhh: gen fwrei_ratio_2000=fwrei_ratio if year==2000
bys idhh: egen fwrei_ratio_00=mean(fwrei_ratio_2000)
drop fwrei_ratio_2000
*2006
bys idhh: gen fwrei_ratio_2006=fwrei_ratio if year==2006
bys idhh: egen fwrei_ratio_06=mean(fwrei_ratio_2006)
drop fwrei_ratio_2006
*2007
bys idhh: gen fwrei_ratio_2007=fwrei_ratio if year==2007
bys idhh: egen fwrei_ratio_07=mean(fwrei_ratio_2007)
drop fwrei_ratio_2007
foreach v of varlist fwrei_ratio_99 fwrei_ratio_00 fwrei_ratio_06 fwrei_ratio_07 {
                winsor `v', gen(w`v') p(.01)
                replace `v'=w`v'
                drop w`v'
                }									
*Total Saving Rate in 1999 and 2007*
/* The first observation for saving rate will be from year 2000*/
*1999
bys idhh: gen tot_sav_rate_1999=tot_sav_rate if year==2000
bys idhh: egen tot_sav_rate_99=mean(tot_sav_rate_1999)
drop tot_sav_rate_1999
bys idhh: gen tot_sav_rate_inc_1999=tot_sav_rate_inc if year==2000
bys idhh: egen tot_sav_rate_inc_99=mean(tot_sav_rate_inc_1999)
drop tot_sav_rate_inc_1999
*2007
bys idhh: gen tot_sav_rate_2007=tot_sav_rate if year==2007
bys idhh: egen tot_sav_rate_07=mean(tot_sav_rate_2007)
drop tot_sav_rate_2007
bys idhh: gen tot_sav_rate_inc_2007=tot_sav_rate_inc if year==2007
bys idhh: egen tot_sav_rate_inc_07=mean(tot_sav_rate_inc_2007)
drop tot_sav_rate_inc_2007
*Change between 1999 and 2007
gen  d_tot_sav_rate_99_07=(tot_sav_rate_07-tot_sav_rate_99)
la var d_tot_sav_rate_99_07 "Change in Tot Sav Rate"
gen  d_tot_sav_rate_inc_99_07=(tot_sav_rate_inc_07-tot_sav_rate_inc_99)
la var d_tot_sav_rate_inc_99_07 "Change in Tot Sav Rate Inc"
**Identify those people who switch their industries between 1999-2007 (we use 2006 since industry codes changed in 2007)*
*1999
bys idhh: gen SNI2_s_1999=SNI2_s if year==1999
bys idhh: gen sni_2_1999=sni_2 if year==1999
*String var to a numeric var*
encode SNI2_s_1999, gen(eSNI2_s_1999)
bys idhh: egen eSNI2_s_99=mean(eSNI2_s_1999) 
drop SNI2_s_1999 eSNI2_s_1999 
bys idhh: egen sni_2_99=mean(sni_2_1999) 
drop sni_2_1999  
*2007
bys idhh: gen SNI2_s_2006=SNI2_s if year==2006
bys idhh: gen sni_2_2006=sni_2 if year==2006
*bys idhh: gen SNI2_s_2007=SNI2_s if year==2007
*String var to a numeric var*
encode SNI2_s_2006, gen(eSNI2_s_2006)
bys idhh: egen eSNI2_s_06=mean(eSNI2_s_2006) 
drop SNI2_s_2006 eSNI2_s_2006 
bys idhh: egen sni_2_06=mean(sni_2_2006) 
drop sni_2_2006  
*encode SNI2_s_2007, gen(eSNI2_s_2007)
*bys idhh: egen eSNI2_s_07=mean(eSNI2_s_2007) 
*drop SNI2_s_2007 eSNI2_s_2007
*Define an Indicator for Industry-Switchers*
gen switcher_99_07=1 if (eSNI2_s_99!=eSNI2_s_06) & unemp_07==0
replace switcher_99_07=0 if (eSNI2_s_99==eSNI2_s_06) 
la var switcher_99_07 "Switcher"
gen switcher_99_06=1 if (eSNI2_s_99!=eSNI2_s_06) 
replace switcher_99_06=0 if (eSNI2_s_99==eSNI2_s_06) 
*replace switcher_99_06=. if (eSNI2_s_06==.) 
la var switcher_99_06 "Switcher (99-06)"
sum switcher*
*Define an Indicator for Industry-Stayers based on sni2*
gen switcher2_99_06=1 if (sni_2_99!=sni_2_06) 
replace switcher2_99_06=0 if (sni_2_99==sni_2_06) 
*replace switcher2_99_06=. if (sni_2_06==.) 
*Homeowersnip status in 1999*
bys idhh: gen homeow_1999=homeow if year==1999
bys idhh: egen homeow_99=mean(homeow_1999)
bys idhh: gen realeow_1999=realeow if year==1999
bys idhh: egen realeow_99=mean(realeow_1999)
drop homeow_1999 realeow_1999
*Homeownership status in 2007*
bys idhh: gen homeow_2007=homeow if year==2007
bys idhh: egen homeow_07=mean(homeow_2007)
bys idhh: gen realeow_2007=realeow if year==2007
bys idhh: egen realeow_07=mean(realeow_2007)
drop homeow_2007 realeow_2007
*An Entry Variable in the House Market for 1999-2007 period
gen buy_house_99_07=1 if (homeow_07==1 & homeow_99==0) & year==1999
replace buy_house_99_07=0 if (homeow_07==0 & homeow_99==0) & year==1999
replace buy_house_99_07=. if (homeow_07==1 & homeow_99==1) & year==1999 
replace buy_house_99_07=. if (homeow_07==0 & homeow_99==1) & year==1999 
**
gen buy_reale_99_07=1 if (realeow_07==1 & realeow_99==0) & year==1999
replace buy_reale_99_07=0 if (realeow_07==0 & realeow_99==0) & year==1999
replace buy_reale_99_07=. if (realeow_07==1 & realeow_99==1) & year==1999 
replace buy_reale_99_07=. if (realeow_07==0 & realeow_99==1) & year==1999
*An Exit Variable from the House Market for 1999-2007 period
gen sell_house_99_07=1 if (homeow_07==0 & homeow_99==1) & year==1999
replace sell_house_99_07=0 if (homeow_07==1 & homeow_99==1) & year==1999
replace sell_house_99_07=. if (homeow_07==0 & homeow_99==0) & year==1999 
replace sell_house_99_07=. if (homeow_07==1 & homeow_99==0) & year==1999 
**
gen sell_reale_99_07=1 if (realeow_07==0 & realeow_99==1) & year==1999
replace sell_reale_99_07=0 if (realeow_07==1 & realeow_99==1) & year==1999
replace sell_reale_99_07=. if (realeow_07==0 & realeow_99==0) & year==1999 
replace sell_reale_99_07=. if (realeow_07==1 & realeow_99==0) & year==1999
****NEW DIFFERENCES FOR HOUSEHOLD CONTROLS***
**Difference in log disposable income*
*1999
bys idhh: gen linc_1999=linc if year==1999
bys idhh: egen linc_99=mean(linc_1999)
drop linc_1999
*2007
bys idhh: gen linc_2007=linc if year==2007
bys idhh: egen linc_07=mean(linc_2007)
drop linc_2007
*Change between 1999 and 2007
gen dlinc_99_07=(linc_07-linc_99) if year==1999
*Drop Variables of No Interest*
drop linc_07 linc_99
**Change in being married*
*1999
bys idhh: gen married_1999=married if year==1999
bys idhh: egen married_99=mean(married_1999)
drop married_1999
*2007
bys idhh: gen married_2007=married if year==2007
bys idhh: egen married_07=mean(married_2007)
drop married_2007
*Change between 1999 and 2007
gen dmarried_99_07=(married_07-married_99) if year==1999
*Drop Variables of No Interest*
drop married_07 married_99
**Change in number of children*
*1999
bys idhh: gen nr_children_1999=nr_children if year==1999
bys idhh: egen nr_children_99=mean(nr_children_1999)
drop nr_children_1999
*2000
bys idhh: gen nr_children_2000=nr_children if year==2000
bys idhh: egen nr_children_00=mean(nr_children_2000)
drop nr_children_2000
*2006
bys idhh: gen nr_children_2006=nr_children if year==2006
bys idhh: egen nr_children_06=mean(nr_children_2006)
drop nr_children_2006
*2007
bys idhh: gen nr_children_2007=nr_children if year==2007
bys idhh: egen nr_children_07=mean(nr_children_2007)
drop nr_children_2007
*Change between 1999 and 2007
gen dnr_children_99_07=(nr_children_07-nr_children_99) if year==1999
*Drop Variables of No Interest*
*drop nr_children_07 nr_children_99
**Change in number of adults*
*1999
bys idhh: gen nr_adults_1999=nr_adults if year==1999
bys idhh: egen nr_adults_99=mean(nr_adults_1999)
drop nr_adults_1999
*2000
bys idhh: gen nr_adults_2000=nr_adults if year==2000
bys idhh: egen nr_adults_00=mean(nr_adults_2000)
drop nr_adults_2000
*2006
bys idhh: gen nr_adults_2006=nr_adults if year==2006
bys idhh: egen nr_adults_06=mean(nr_adults_2006)
drop nr_adults_2006
*2007
bys idhh: gen nr_adults_2007=nr_adults if year==2007
bys idhh: egen nr_adults_07=mean(nr_adults_2007)
drop nr_adults_2007
*Change between 1999 and 2007
gen dnr_adults_99_07=(nr_adults_07-nr_adults_99) if year==1999
*Drop Variables of No Interest*
*drop nr_adults_07 nr_adults_99
**Change in college education*
*1999
bys idhh: gen college_1999=college if year==1999
bys idhh: egen college_99=mean(college_1999)
drop college_1999
*2007
bys idhh: gen college_2007=college if year==2007
bys idhh: egen college_07=mean(college_2007)
drop college_2007
*Change between 1999 and 2007
gen dcollege_99_07=(college_07-college_99) if year==1999
replace dcollege_99_07 = 0 if dcollege_99_07==-1 & year==1999
*Drop Variables of No Interest*
drop college_07 college_99
**Change in high school education*
*1999
bys idhh: gen hschool_1999=hschool if year==1999
bys idhh: egen hschool_99=mean(hschool_1999)
drop hschool_1999
*2007
bys idhh: gen hschool_2007=hschool if year==2007
bys idhh: egen hschool_07=mean(hschool_2007)
drop hschool_2007
*Change between 1999 and 2007
gen dhschool_99_07=(hschool_07-hschool_99) if year==1999
replace dhschool_99_07 = 0 if dhschool_99_07==-1 & year==1999
*Drop Variables of No Interest*
drop hschool_07 hschool_99
**Changed municipality (move)*
*gen move_99_07 = 0
*replace move_99_07 = 1 if muni2007!=muni1999  & year==1999
**Summary statistics*
sum dlinc_99_07 dmarried_99_07 dnr_children_99_07 dnr_adults_99_07 dcollege_99_07 dhschool_99_07 /*move_99_07*/ if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1), d
sum exit_stock_99_07 unemp_07 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1)
sum exit_stock_99_07 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1) & unemp_07 == 1
sum exit_stock_99_07 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1) & unemp_07 == 0
sum unemp_07 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1) & exit_stock_99_07 == 1
sum unemp_07 if year == 1999 & robot_ind == 1 & (selfemployed!=1 & student!=1 & retired!=1 &unemployed!=1) & exit_stock_99_07 == 0
***************************************************************
*Drop students/retirees/self-employed/unemployed from the sample
count
drop if (selfemployed==1|student==1|retired==1|unemployed==1) 
**Number of "Treated" Households as of 1999 after the filters
tab robot_ind
*Save the Data
save "$projf/data/Linda_panel_robot230423.dta", replace
count
**Close the log-file
cap log close

*cf _all using "$projf/data/Linda_panel_robot230413.dta"
*cf _all using "$projf/data/Linda_panel_robot211216.dta"
*cf _all using "$projf/data/Linda_panel_robot210910.dta"
